package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo3SqlSubmit {
  def main(args: Array[String]): Unit = {
    //创建spark sql环境
    val spark: SparkSession = SparkSession
      .builder()
      .appName("dsl")
      .config("spark.sql.shuffle.partitions", 1)
      .getOrCreate()

    import spark.implicits._
    import org.apache.spark.sql.functions._

    //读取数据，创建DF
    val studentDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep", ",")
      .schema("id STRING,name STRING,age INT,sex STRING,clazz STRING")
      .load("/data/student")


    val clazzNumDS: DataFrame = studentDF
      .groupBy($"clazz")
      .agg(count($"id") as "num")

    //保存结果
    clazzNumDS
      .write
      .format("csv")
      .option("sep", ",")
      .mode(SaveMode.Overwrite)
      .save("/data/clazz_num")
  }
}